**************************
***	RUN USING STATA 16 ***
**************************

* Purpose: decompose change in pass rate into test or other
* Last updated: 20 Apr 2023

*------------------------------------------------------------------------------
*** import IRT estimates
	use "$output/mat_score_estimates.dta", clear
	
*** reshape for distribution of scores by exam year
	egen pick = tag(yr) //retain one observation per year
	keep if pick==1
	keep yr ability* tot* marks_yr
	reshape long ability_ tot, i(yr) j(t)
	drop t
	
*** put in rank order and calculate pct correct
	sort yr ability_
	bysort yr : gen n = _n
	gen pct = tot/marks_yr
		
*** identify candidate at pass threshold, based on WAEC pass rate for that year
*** pooled reference, so no single base year (omitting 2017)
	gen cut = .
	replace cut = 1 if yr==2011 & n==232
	replace cut = 1 if yr==2012 & n==180
	replace cut = 1 if yr==2013 & n==280
	replace cut = 1 if yr==2014 & n==320
	replace cut = 1 if yr==2015 & n==458
	replace cut = 1 if yr==2016 & n==380
	replace cut = 1 if yr==2018 & n==320
	replace cut = 1 if yr==2019 & n==156

*** identify relevant pooled pct and pooled ability at average cut
	gen g = pct if cut==1
	egen marks = mean(g)
	gen h = ability_ if cut==1
	egen level = mean(h)
	gen fail_us = 1 if pct < marks
	gen ref = 1 if ability_ < level
	gen fail_waec = n if cut==1
	collapse (count) fail_us ref (mean) fail_waec, by(yr)
	
*** calculate excess failures
	gen xs_test = ref - fail_us
	gen xs_kids = (ref - fail_waec) - xs_test
	gen xs_test_pp = xs_test/10
	gen xs_kids_pp = xs_kids/10
		
*** prepare for figure
	replace xs_test_pp = . if yr==2017
	replace xs_kids_pp = . if yr==2017	
	gen y1 = 0
	replace y1 = xs_test_pp if xs_test_pp*xs_kids_pp>0
	gen y2 = xs_kids_pp + y1

	tw ///
	(bar xs_test_pp yr, fc(black) lc(black) lw(none) fi(100) barwidth(0.8)) ///
	(rbar y1 y2 yr, fc(white) fi(100) lcolor(black) lw(thin) barwidth(0.8)) ///
	, xlabel(2011(1)2019, nogrid labsize(10pt)) ///
	ylabel(-20(5)20, labsize(10pt))	///
	legend(lab(1 "Test") lab(2 "Cohort") size(10pt) pos(6) col(2)) ///
	ytitle("Percentage Points", size(10pt)) xtitle("Year", size(10pt)) ///
	xsize(4.5) ysize(3)
	graph export "$graph\3b.png", replace
	graph export "$graph\3b.eps", replace
	
*** calculate excess failures (values for Table 4)
	merge 1:1 yr using "$input\waec_pass.dta"
	keep if yr>2010
	gen xs_candidates = round(mat_graded*xs_test_pp/-100, 1)
	list yr mat_graded xs_candidates, sep(0)
	
	
	
	